library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.2.1
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.2.1
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
setwd('C:/Users/mecha_000/Desktop/DataScience/Python/DataVisualizations/project')
loans <- read.csv(file = 'prosperLoanData.csv', sep = ',', quote = "\"'")
loans$ListingCreationDate <- as.Date(loans$ListingCreationDate)
loans$ListingCreationYear <- as.numeric(format(loans$ListingCreationDate, "%Y"))
loans.summary_by_year <-
loans %>%
group_by(ListingCreationYear) %>%
summarise(mean_loanOriginalAmount = mean(as.numeric(LoanOriginalAmount)),
median_loanOriginalAmount = median(as.numeric(LoanOriginalAmount)),
sum_loanOriginalAmount = sum(as.numeric(LoanOriginalAmount)),
mean_amountDelinquent = mean(as.numeric(AmountDelinquent)),
median_amountDelinquent = median(as.numeric(AmountDelinquent)),
sum_amountDelinquent = sum(as.numeric(AmountDelinquent)),
mean_investors = mean(as.numeric(Investors)),
median_investors = median(as.numeric(Investors)),
sum_investors = sum(as.numeric(Investors)),
mean_prosperScore = mean(as.numeric(ProsperScore)),
median_prosperScore = median(as.numeric(ProsperScore)),
sum_prosperScore = sum(as.numeric(ProsperScore)),
mean_estimatedLoss = mean(as.numeric(EstimatedLoss)),
median_estimatedLoss = median(as.numeric(EstimatedLoss)),
sum_estimatedLoss = sum(as.numeric(EstimatedLoss)),
mean_estimatedReturn = mean(as.numeric(EstimatedReturn)),
median_estimatedReturn = median(as.numeric(EstimatedReturn)),
sum_estimatedReturn = sum(as.numeric(EstimatedReturn)),
count = n()) %>%
arrange(ListingCreationYear)
write.csv(loans.summary_by_year, file="prosperLoanData_SummaryByYear.csv")
summary(loans)
## ListingKey ListingNumber ListingCreationDate
## 17A93590655669644DB4C06: 6 Min. : 4 Min. :2005-11-09
## 349D3587495831350F0F648: 4 1st Qu.: 400919 1st Qu.:2008-09-19
## 47C1359638497431975670B: 4 Median : 600554 Median :2012-06-16
## 8474358854651984137201C: 4 Mean : 627886 Mean :2011-07-08
## DE8535960513435199406CE: 4 3rd Qu.: 892634 3rd Qu.:2013-09-09
## 04C13599434217079754AEE: 3 Max. :1255725 Max. :2014-03-10
## (Other) :113912
## CreditGrade Term LoanStatus
## :84984 Min. :12.00 Current :56576
## C : 5649 1st Qu.:36.00 Completed :38074
## D : 5153 Median :36.00 Chargedoff :11992
## B : 4389 Mean :40.83 Defaulted : 5018
## AA : 3509 3rd Qu.:36.00 Past Due (1-15 days) : 806
## HR : 3508 Max. :60.00 Past Due (31-60 days): 363
## (Other): 6745 (Other) : 1108
## ClosedDate BorrowerAPR BorrowerRate
## :58848 Min. :0.00653 Min. :0.0000
## 2014-03-04 00:00:00: 105 1st Qu.:0.15629 1st Qu.:0.1340
## 2014-02-19 00:00:00: 100 Median :0.20976 Median :0.1840
## 2014-02-11 00:00:00: 92 Mean :0.21883 Mean :0.1928
## 2012-10-30 00:00:00: 81 3rd Qu.:0.28381 3rd Qu.:0.2500
## 2013-02-26 00:00:00: 78 Max. :0.51229 Max. :0.4975
## (Other) :54633 NA's :25
## LenderYield EstimatedEffectiveYield EstimatedLoss
## Min. :-0.0100 Min. :-0.183 Min. :0.005
## 1st Qu.: 0.1242 1st Qu.: 0.116 1st Qu.:0.042
## Median : 0.1730 Median : 0.162 Median :0.072
## Mean : 0.1827 Mean : 0.169 Mean :0.080
## 3rd Qu.: 0.2400 3rd Qu.: 0.224 3rd Qu.:0.112
## Max. : 0.4925 Max. : 0.320 Max. :0.366
## NA's :29084 NA's :29084
## EstimatedReturn ProsperRating..numeric. ProsperRating..Alpha.
## Min. :-0.183 Min. :1.000 :29084
## 1st Qu.: 0.074 1st Qu.:3.000 C :18345
## Median : 0.092 Median :4.000 B :15581
## Mean : 0.096 Mean :4.072 A :14551
## 3rd Qu.: 0.117 3rd Qu.:5.000 D :14274
## Max. : 0.284 Max. :7.000 E : 9795
## NA's :29084 NA's :29084 (Other):12307
## ProsperScore ListingCategory..numeric. BorrowerState
## Min. : 1.00 Min. : 0.000 CA :14717
## 1st Qu.: 4.00 1st Qu.: 1.000 TX : 6842
## Median : 6.00 Median : 1.000 NY : 6729
## Mean : 5.95 Mean : 2.774 FL : 6720
## 3rd Qu.: 8.00 3rd Qu.: 3.000 IL : 5921
## Max. :11.00 Max. :20.000 : 5515
## NA's :29084 (Other):67493
## Occupation EmploymentStatus
## Other :28617 Employed :67322
## Professional :13628 Full-time :26355
## Computer Programmer : 4478 Self-employed: 6134
## Executive : 4311 Not available: 5347
## Teacher : 3759 Other : 3806
## Administrative Assistant: 3688 : 2255
## (Other) :55456 (Other) : 2718
## EmploymentStatusDuration IsBorrowerHomeowner CurrentlyInGroup
## Min. : 0.00 False:56459 False:101218
## 1st Qu.: 26.00 True :57478 True : 12719
## Median : 67.00
## Mean : 96.07
## 3rd Qu.:137.00
## Max. :755.00
## NA's :7625
## GroupKey DateCreditPulled
## :100596 2013-12-23 09:38:12: 6
## 783C3371218786870A73D20: 1140 2013-11-21 09:09:41: 4
## 3D4D3366260257624AB272D: 916 2013-12-06 05:43:16: 4
## 6A3B336601725506917317E: 698 2014-01-14 20:17:49: 4
## FEF83377364176536637E50: 611 2014-02-09 12:14:41: 4
## C9643379247860156A00EC0: 342 2013-09-27 22:04:54: 3
## (Other) : 9634 (Other) :113912
## CreditScoreRangeLower CreditScoreRangeUpper
## Min. : 0.0 Min. : 19.0
## 1st Qu.:660.0 1st Qu.:679.0
## Median :680.0 Median :699.0
## Mean :685.6 Mean :704.6
## 3rd Qu.:720.0 3rd Qu.:739.0
## Max. :880.0 Max. :899.0
## NA's :591 NA's :591
## FirstRecordedCreditLine CurrentCreditLines OpenCreditLines
## : 697 Min. : 0.00 Min. : 0.00
## 1993-12-01 00:00:00: 185 1st Qu.: 7.00 1st Qu.: 6.00
## 1994-11-01 00:00:00: 178 Median :10.00 Median : 9.00
## 1995-11-01 00:00:00: 168 Mean :10.32 Mean : 9.26
## 1990-04-01 00:00:00: 161 3rd Qu.:13.00 3rd Qu.:12.00
## 1995-03-01 00:00:00: 159 Max. :59.00 Max. :54.00
## (Other) :112389 NA's :7604 NA's :7604
## TotalCreditLinespast7years OpenRevolvingAccounts
## Min. : 2.00 Min. : 0.00
## 1st Qu.: 17.00 1st Qu.: 4.00
## Median : 25.00 Median : 6.00
## Mean : 26.75 Mean : 6.97
## 3rd Qu.: 35.00 3rd Qu.: 9.00
## Max. :136.00 Max. :51.00
## NA's :697
## OpenRevolvingMonthlyPayment InquiriesLast6Months TotalInquiries
## Min. : 0.0 Min. : 0.000 Min. : 0.000
## 1st Qu.: 114.0 1st Qu.: 0.000 1st Qu.: 2.000
## Median : 271.0 Median : 1.000 Median : 4.000
## Mean : 398.3 Mean : 1.435 Mean : 5.584
## 3rd Qu.: 525.0 3rd Qu.: 2.000 3rd Qu.: 7.000
## Max. :14985.0 Max. :105.000 Max. :379.000
## NA's :697 NA's :1159
## CurrentDelinquencies AmountDelinquent DelinquenciesLast7Years
## Min. : 0.0000 Min. : 0.0 Min. : 0.000
## 1st Qu.: 0.0000 1st Qu.: 0.0 1st Qu.: 0.000
## Median : 0.0000 Median : 0.0 Median : 0.000
## Mean : 0.5921 Mean : 984.5 Mean : 4.155
## 3rd Qu.: 0.0000 3rd Qu.: 0.0 3rd Qu.: 3.000
## Max. :83.0000 Max. :463881.0 Max. :99.000
## NA's :697 NA's :7622 NA's :990
## PublicRecordsLast10Years PublicRecordsLast12Months RevolvingCreditBalance
## Min. : 0.0000 Min. : 0.000 Min. : 0
## 1st Qu.: 0.0000 1st Qu.: 0.000 1st Qu.: 3121
## Median : 0.0000 Median : 0.000 Median : 8549
## Mean : 0.3126 Mean : 0.015 Mean : 17599
## 3rd Qu.: 0.0000 3rd Qu.: 0.000 3rd Qu.: 19521
## Max. :38.0000 Max. :20.000 Max. :1435667
## NA's :697 NA's :7604 NA's :7604
## BankcardUtilization AvailableBankcardCredit TotalTrades
## Min. :0.000 Min. : 0 Min. : 0.00
## 1st Qu.:0.310 1st Qu.: 880 1st Qu.: 15.00
## Median :0.600 Median : 4100 Median : 22.00
## Mean :0.561 Mean : 11210 Mean : 23.23
## 3rd Qu.:0.840 3rd Qu.: 13180 3rd Qu.: 30.00
## Max. :5.950 Max. :646285 Max. :126.00
## NA's :7604 NA's :7544 NA's :7544
## TradesNeverDelinquent..percentage. TradesOpenedLast6Months
## Min. :0.000 Min. : 0.000
## 1st Qu.:0.820 1st Qu.: 0.000
## Median :0.940 Median : 0.000
## Mean :0.886 Mean : 0.802
## 3rd Qu.:1.000 3rd Qu.: 1.000
## Max. :1.000 Max. :20.000
## NA's :7544 NA's :7544
## DebtToIncomeRatio IncomeRange IncomeVerifiable
## Min. : 0.000 $25,000-49,999:32192 False: 8669
## 1st Qu.: 0.140 $50,000-74,999:31050 True :105268
## Median : 0.220 $100,000+ :17337
## Mean : 0.276 $75,000-99,999:16916
## 3rd Qu.: 0.320 Not displayed : 7741
## Max. :10.010 $1-24,999 : 7274
## NA's :8554 (Other) : 1427
## StatedMonthlyIncome LoanKey TotalProsperLoans
## Min. : 0 CB1B37030986463208432A1: 6 Min. :0.00
## 1st Qu.: 3200 2DEE3698211017519D7333F: 4 1st Qu.:1.00
## Median : 4667 9F4B37043517554537C364C: 4 Median :1.00
## Mean : 5608 D895370150591392337ED6D: 4 Mean :1.42
## 3rd Qu.: 6825 E6FB37073953690388BC56D: 4 3rd Qu.:2.00
## Max. :1750003 0D8F37036734373301ED419: 3 Max. :8.00
## (Other) :113912 NA's :91852
## TotalProsperPaymentsBilled OnTimeProsperPayments
## Min. : 0.00 Min. : 0.00
## 1st Qu.: 9.00 1st Qu.: 9.00
## Median : 16.00 Median : 15.00
## Mean : 22.93 Mean : 22.27
## 3rd Qu.: 33.00 3rd Qu.: 32.00
## Max. :141.00 Max. :141.00
## NA's :91852 NA's :91852
## ProsperPaymentsLessThanOneMonthLate ProsperPaymentsOneMonthPlusLate
## Min. : 0.00 Min. : 0.00
## 1st Qu.: 0.00 1st Qu.: 0.00
## Median : 0.00 Median : 0.00
## Mean : 0.61 Mean : 0.05
## 3rd Qu.: 0.00 3rd Qu.: 0.00
## Max. :42.00 Max. :21.00
## NA's :91852 NA's :91852
## ProsperPrincipalBorrowed ProsperPrincipalOutstanding
## Min. : 0 Min. : 0
## 1st Qu.: 3500 1st Qu.: 0
## Median : 6000 Median : 1627
## Mean : 8472 Mean : 2930
## 3rd Qu.:11000 3rd Qu.: 4127
## Max. :72499 Max. :23451
## NA's :91852 NA's :91852
## ScorexChangeAtTimeOfListing LoanCurrentDaysDelinquent
## Min. :-209.00 Min. : 0.0
## 1st Qu.: -35.00 1st Qu.: 0.0
## Median : -3.00 Median : 0.0
## Mean : -3.22 Mean : 152.8
## 3rd Qu.: 25.00 3rd Qu.: 0.0
## Max. : 286.00 Max. :2704.0
## NA's :95009
## LoanFirstDefaultedCycleNumber LoanMonthsSinceOrigination LoanNumber
## Min. : 0.00 Min. : 0.0 Min. : 1
## 1st Qu.: 9.00 1st Qu.: 6.0 1st Qu.: 37332
## Median :14.00 Median : 21.0 Median : 68599
## Mean :16.27 Mean : 31.9 Mean : 69444
## 3rd Qu.:22.00 3rd Qu.: 65.0 3rd Qu.:101901
## Max. :44.00 Max. :100.0 Max. :136486
## NA's :96985
## LoanOriginalAmount LoanOriginationDate LoanOriginationQuarter
## Min. : 1000 2014-01-22 00:00:00: 491 Q4 2013:14450
## 1st Qu.: 4000 2013-11-13 00:00:00: 490 Q1 2014:12172
## Median : 6500 2014-02-19 00:00:00: 439 Q3 2013: 9180
## Mean : 8337 2013-10-16 00:00:00: 434 Q2 2013: 7099
## 3rd Qu.:12000 2014-01-28 00:00:00: 339 Q3 2012: 5632
## Max. :35000 2013-09-24 00:00:00: 316 Q2 2012: 5061
## (Other) :111428 (Other):60343
## MemberKey MonthlyLoanPayment LP_CustomerPayments
## 63CA34120866140639431C9: 9 Min. : 0.0 Min. : -2.35
## 16083364744933457E57FB9: 8 1st Qu.: 131.6 1st Qu.: 1005.76
## 3A2F3380477699707C81385: 8 Median : 217.7 Median : 2583.83
## 4D9C3403302047712AD0CDD: 8 Mean : 272.5 Mean : 4183.08
## 739C338135235294782AE75: 8 3rd Qu.: 371.6 3rd Qu.: 5548.40
## 7E1733653050264822FAA3D: 8 Max. :2251.5 Max. :40702.39
## (Other) :113888
## LP_CustomerPrincipalPayments LP_InterestandFees LP_ServiceFees
## Min. : 0.0 Min. : -2.35 Min. :-664.87
## 1st Qu.: 500.9 1st Qu.: 274.87 1st Qu.: -73.18
## Median : 1587.5 Median : 700.84 Median : -34.44
## Mean : 3105.5 Mean : 1077.54 Mean : -54.73
## 3rd Qu.: 4000.0 3rd Qu.: 1458.54 3rd Qu.: -13.92
## Max. :35000.0 Max. :15617.03 Max. : 32.06
##
## LP_CollectionFees LP_GrossPrincipalLoss LP_NetPrincipalLoss
## Min. :-9274.75 Min. : -94.2 Min. : -954.5
## 1st Qu.: 0.00 1st Qu.: 0.0 1st Qu.: 0.0
## Median : 0.00 Median : 0.0 Median : 0.0
## Mean : -14.24 Mean : 700.4 Mean : 681.4
## 3rd Qu.: 0.00 3rd Qu.: 0.0 3rd Qu.: 0.0
## Max. : 0.00 Max. :25000.0 Max. :25000.0
##
## LP_NonPrincipalRecoverypayments PercentFunded Recommendations
## Min. : 0.00 Min. :0.7000 Min. : 0.00000
## 1st Qu.: 0.00 1st Qu.:1.0000 1st Qu.: 0.00000
## Median : 0.00 Median :1.0000 Median : 0.00000
## Mean : 25.14 Mean :0.9986 Mean : 0.04803
## 3rd Qu.: 0.00 3rd Qu.:1.0000 3rd Qu.: 0.00000
## Max. :21117.90 Max. :1.0125 Max. :39.00000
##
## InvestmentFromFriendsCount InvestmentFromFriendsAmount Investors
## Min. : 0.00000 Min. : 0.00 Min. : 1.00
## 1st Qu.: 0.00000 1st Qu.: 0.00 1st Qu.: 2.00
## Median : 0.00000 Median : 0.00 Median : 44.00
## Mean : 0.02346 Mean : 16.55 Mean : 80.48
## 3rd Qu.: 0.00000 3rd Qu.: 0.00 3rd Qu.: 115.00
## Max. :33.00000 Max. :25000.00 Max. :1189.00
##
## ListingCreationYear
## Min. :2005
## 1st Qu.:2008
## Median :2012
## Mean :2011
## 3rd Qu.:2013
## Max. :2014
##
# Histogram of loans by amount.
qplot(data = loans, x = LoanOriginalAmount, binwidth = 500)

# Histogram of loans by date.
qplot(data = loans, x = ListingCreationDate)
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

# Histogram of loans by delinquent amount.
qplot(data = loans, x = AmountDelinquent, binwidth = 500)

# Histogram of loans by occupations
qplot(data = loans, x = Occupation)

# Histogram of loans by state
qplot(data = loans, x = BorrowerState)

# Histogram of loans by income range
qplot(data = loans, x = IncomeRange)

# Histogram of loans by number of investors
qplot(data = loans, x = Investors, binwidth = 10)

# Histogram of loans by lower credit score
qplot(data = loans, x = CreditScoreRangeLower, binwidth = 10)

# Histogram of loans by upper credit score
qplot(data = loans, x = CreditScoreRangeUpper, binwidth = 10)

# Histogram of loans by prosper risk score
qplot(data = loans, x = ProsperScore, binwidth = 1)

# Histogram of loans by estimated loss
qplot(data = loans, x = EstimatedLoss, binwidth = .01)

# Histogram of loans by estimated return
qplot(data = loans, x = EstimatedReturn, binwidth = .01)

# Histogram of loans by amount per state
qplot(data = loans, x = LoanOriginalAmount, binwidth = 1000) +
facet_wrap( ~ BorrowerState) +
labs(title = "Distribution of loan amounts per state",
x = "Loan amount", y = "Number of loans")

# Histogram of loans by amount per occupation
qplot(data = loans, x = LoanOriginalAmount, binwidth = 1000) +
facet_wrap( ~ Occupation) +
labs(title = "Distribution of loan amounts per occupation",
x = "Loan amount", y = "Number of loans")

# Box plot of loan amounts and income ranges
ggplot(
aes(x = IncomeRange, y = LoanOriginalAmount),
data = loans) +
geom_boxplot() +
labs(
title = "Income to loan amount",
x = "Income", y = "Loan amount")

# Box plot of loan amounts and occupations.
ggplot(
aes(x = Occupation, y = LoanOriginalAmount),
data = loans) +
geom_boxplot() +
labs(
title = "Income to loan amount",
x = "Income", y = "Loan amount")

# Box plot of loan amounts and state
ggplot(
aes(x = BorrowerState, y = LoanOriginalAmount),
data = loans) +
geom_boxplot() +
labs(
title = "Income to loan amount",
x = "Income", y = "Loan amount")

###
# Scatter plot of monthly payments and loan amounts, colored by estimated return,
ggplot(
aes(x = MonthlyLoanPayment, y = LoanOriginalAmount),
data = subset(loans, )
) +
geom_point(aes(color = EstimatedReturn)) +
labs(
title = "Monthly payment to loan amount",
x = "Monthly payment", y = "Loan amount")

# Scatter plot of monthly payments and loan amounts, colored by estimated return,
# and faceted by Prosper's risk score
ggplot(
aes(x = MonthlyLoanPayment, y = LoanOriginalAmount),
data = subset(loans, )
) +
geom_point(aes(color = EstimatedReturn)) +
facet_wrap(~ ProsperScore) +
labs(
title = "Monthly payment to loan amount",
x = "Monthly payment", y = "Loan amount")

# Scatter plot of monthly payments and loan amounts, colored by estimated return,
# and faceted by Occupation
ggplot(
aes(x = MonthlyLoanPayment, y = LoanOriginalAmount),
data = subset(loans, )
) +
geom_point(aes(color = EstimatedReturn)) +
facet_wrap(~ Occupation) +
labs(
title = "Monthly payment to loan amount",
x = "Monthly payment", y = "Loan amount")

# Scatter plot of monthly payments and loan amounts, colored by estimated return,
# and faceted by state
ggplot(
aes(x = MonthlyLoanPayment, y = LoanOriginalAmount),
data = subset(loans, )
) +
geom_point(aes(color = EstimatedReturn)) +
facet_wrap(~ BorrowerState) +
labs(
title = "Monthly payment to loan amount",
x = "Monthly payment", y = "Loan amount")

# Scatter plot of monthly payments and loan amounts, colored by estimated return,
# and faceted by income range
ggplot(
aes(x = MonthlyLoanPayment, y = LoanOriginalAmount),
data = subset(loans, )
) +
geom_point(aes(color = EstimatedReturn)) +
facet_wrap(~ IncomeRange) +
labs(
title = "Monthly payment to loan amount",
x = "Monthly payment", y = "Loan amount")

###
# Scatterplot of estimated loss to estimated return, colored by loan amount,
ggplot(
aes(x = EstimatedLoss, y = EstimatedReturn),
data = loans) +
geom_point(aes(color = LoanOriginalAmount)) +
labs(
title = "Estimated loss to return",
x = "Estimated loss", y = "Estimated return")
## Warning: Removed 29084 rows containing missing values (geom_point).

# Scatterplot of estimated loss to estimated return, colored by loan amount,
# and faceted by occupation
ggplot(
aes(x = EstimatedLoss, y = EstimatedReturn),
data = loans) +
geom_point(aes(color = LoanOriginalAmount)) +
facet_wrap( ~ Occupation) +
labs(
title = "Estimated loss to return",
x = "Estimated loss", y = "Estimated return")
## Warning: Removed 2255 rows containing missing values (geom_point).
## Warning: Removed 659 rows containing missing values (geom_point).
## Warning: Removed 980 rows containing missing values (geom_point).
## Warning: Removed 867 rows containing missing values (geom_point).
## Warning: Removed 64 rows containing missing values (geom_point).
## Warning: Removed 180 rows containing missing values (geom_point).
## Warning: Removed 30 rows containing missing values (geom_point).
## Warning: Removed 66 rows containing missing values (geom_point).
## Warning: Removed 37 rows containing missing values (geom_point).
## Warning: Removed 36 rows containing missing values (geom_point).
## Warning: Removed 318 rows containing missing values (geom_point).
## Warning: Removed 39 rows containing missing values (geom_point).
## Warning: Removed 1048 rows containing missing values (geom_point).
## Warning: Removed 1242 rows containing missing values (geom_point).
## Warning: Removed 464 rows containing missing values (geom_point).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 101 rows containing missing values (geom_point).
## Warning: Removed 49 rows containing missing values (geom_point).
## Warning: Removed 225 rows containing missing values (geom_point).
## Warning: Removed 271 rows containing missing values (geom_point).
## Warning: Removed 843 rows containing missing values (geom_point).
## Warning: Removed 103 rows containing missing values (geom_point).
## Warning: Removed 36 rows containing missing values (geom_point).
## Warning: Removed 286 rows containing missing values (geom_point).
## Warning: Removed 234 rows containing missing values (geom_point).
## Warning: Removed 63 rows containing missing values (geom_point).
## Warning: Removed 13 rows containing missing values (geom_point).
## Warning: Removed 378 rows containing missing values (geom_point).
## Warning: Removed 64 rows containing missing values (geom_point).
## Warning: Removed 226 rows containing missing values (geom_point).
## Warning: Removed 448 rows containing missing values (geom_point).
## Warning: Removed 94 rows containing missing values (geom_point).
## Warning: Removed 109 rows containing missing values (geom_point).
## Warning: Removed 76 rows containing missing values (geom_point).
## Warning: Removed 330 rows containing missing values (geom_point).
## Warning: Removed 7300 rows containing missing values (geom_point).
## Warning: Removed 32 rows containing missing values (geom_point).
## Warning: Removed 46 rows containing missing values (geom_point).
## Warning: Removed 301 rows containing missing values (geom_point).
## Warning: Removed 140 rows containing missing values (geom_point).
## Warning: Removed 50 rows containing missing values (geom_point).
## Warning: Removed 3086 rows containing missing values (geom_point).
## Warning: Removed 105 rows containing missing values (geom_point).
## Warning: Removed 27 rows containing missing values (geom_point).
## Warning: Removed 291 rows containing missing values (geom_point).
## Warning: Removed 31 rows containing missing values (geom_point).
## Warning: Removed 601 rows containing missing values (geom_point).
## Warning: Removed 1096 rows containing missing values (geom_point).
## Warning: Removed 768 rows containing missing values (geom_point).
## Warning: Removed 80 rows containing missing values (geom_point).
## Warning: Removed 566 rows containing missing values (geom_point).
## Warning: Removed 166 rows containing missing values (geom_point).
## Warning: Removed 24 rows containing missing values (geom_point).
## Warning: Removed 133 rows containing missing values (geom_point).
## Warning: Removed 85 rows containing missing values (geom_point).
## Warning: Removed 118 rows containing missing values (geom_point).
## Warning: Removed 53 rows containing missing values (geom_point).
## Warning: Removed 18 rows containing missing values (geom_point).
## Warning: Removed 14 rows containing missing values (geom_point).
## Warning: Removed 871 rows containing missing values (geom_point).
## Warning: Removed 76 rows containing missing values (geom_point).
## Warning: Removed 35 rows containing missing values (geom_point).
## Warning: Removed 92 rows containing missing values (geom_point).
## Warning: Removed 154 rows containing missing values (geom_point).
## Warning: Removed 28 rows containing missing values (geom_point).
## Warning: Removed 309 rows containing missing values (geom_point).
## Warning: Removed 142 rows containing missing values (geom_point).

# Scatterplot of estimated loss to estimated return, colored by loan amount,
# and faceted by state
ggplot(
aes(x = EstimatedLoss, y = EstimatedReturn),
data = loans) +
geom_point(aes(color = LoanOriginalAmount)) +
facet_wrap( ~ BorrowerState) +
labs(
title = "Estimated loss to return",
x = "Estimated loss", y = "Estimated return")
## Warning: Removed 5515 rows containing missing values (geom_point).
## Warning: Removed 33 rows containing missing values (geom_point).
## Warning: Removed 492 rows containing missing values (geom_point).
## Warning: Removed 86 rows containing missing values (geom_point).
## Warning: Removed 544 rows containing missing values (geom_point).
## Warning: Removed 3956 rows containing missing values (geom_point).
## Warning: Removed 481 rows containing missing values (geom_point).
## Warning: Removed 135 rows containing missing values (geom_point).
## Warning: Removed 54 rows containing missing values (geom_point).
## Warning: Removed 33 rows containing missing values (geom_point).
## Warning: Removed 1314 rows containing missing values (geom_point).
## Warning: Removed 1661 rows containing missing values (geom_point).
## Warning: Removed 67 rows containing missing values (geom_point).
## Warning: Removed 186 rows containing missing values (geom_point).
## Warning: Removed 196 rows containing missing values (geom_point).
## Warning: Removed 1657 rows containing missing values (geom_point).
## Warning: Removed 426 rows containing missing values (geom_point).
## Warning: Removed 208 rows containing missing values (geom_point).
## Warning: Removed 95 rows containing missing values (geom_point).
## Warning: Removed 113 rows containing missing values (geom_point).
## Warning: Removed 411 rows containing missing values (geom_point).
## Warning: Removed 580 rows containing missing values (geom_point).
## Warning: Removed 101 rows containing missing values (geom_point).
## Warning: Removed 971 rows containing missing values (geom_point).
## Warning: Removed 603 rows containing missing values (geom_point).
## Warning: Removed 822 rows containing missing values (geom_point).
## Warning: Removed 112 rows containing missing values (geom_point).
## Warning: Removed 109 rows containing missing values (geom_point).
## Warning: Removed 645 rows containing missing values (geom_point).
## Warning: Removed 52 rows containing missing values (geom_point).
## Warning: Removed 119 rows containing missing values (geom_point).
## Warning: Removed 108 rows containing missing values (geom_point).
## Warning: Removed 372 rows containing missing values (geom_point).
## Warning: Removed 141 rows containing missing values (geom_point).
## Warning: Removed 66 rows containing missing values (geom_point).
## Warning: Removed 885 rows containing missing values (geom_point).
## Warning: Removed 824 rows containing missing values (geom_point).
## Warning: Removed 239 rows containing missing values (geom_point).
## Warning: Removed 603 rows containing missing values (geom_point).
## Warning: Removed 294 rows containing missing values (geom_point).
## Warning: Removed 26 rows containing missing values (geom_point).
## Warning: Removed 128 rows containing missing values (geom_point).
## Warning: Removed 199 rows containing missing values (geom_point).
## Warning: Removed 1208 rows containing missing values (geom_point).
## Warning: Removed 355 rows containing missing values (geom_point).
## Warning: Removed 499 rows containing missing values (geom_point).
## Warning: Removed 36 rows containing missing values (geom_point).
## Warning: Removed 895 rows containing missing values (geom_point).
## Warning: Removed 321 rows containing missing values (geom_point).
## Warning: Removed 81 rows containing missing values (geom_point).
## Warning: Removed 27 rows containing missing values (geom_point).

# Scatterplot of estimated loss to estimated return, colored by loan amount,
# and faceted by income range
ggplot(
aes(x = EstimatedLoss, y = EstimatedReturn),
data = loans) +
geom_point(aes(color = LoanOriginalAmount)) +
facet_wrap( ~ IncomeRange) +
labs(
title = "Estimated loss to return",
x = "Estimated loss", y = "Estimated return")
## Warning: Removed 576 rows containing missing values (geom_point).
## Warning: Removed 2620 rows containing missing values (geom_point).
## Warning: Removed 2132 rows containing missing values (geom_point).
## Warning: Removed 8017 rows containing missing values (geom_point).
## Warning: Removed 5423 rows containing missing values (geom_point).
## Warning: Removed 2418 rows containing missing values (geom_point).
## Warning: Removed 7741 rows containing missing values (geom_point).
## Warning: Removed 157 rows containing missing values (geom_point).

# Scatterplot of estimated loss to estimated return, colored by loan amount,
# and faceted by prosper risk score
ggplot(
aes(x = EstimatedLoss, y = EstimatedReturn),
data = loans) +
geom_point(aes(color = LoanOriginalAmount)) +
facet_wrap( ~ ProsperScore) +
labs(
title = "Estimated loss to return",
x = "Estimated loss", y = "Estimated return")
## Warning: Removed 29084 rows containing missing values (geom_point).

###
# Scatterplot of credit score to loan amount, colored by number of investors,
ggplot(
aes(x = CreditScoreRangeLower, y = LoanOriginalAmount),
data = loans) +
geom_point(aes(color = Investors)) +
labs(
title = "Credit score to loan amount",
x = "Credit score", y = "Loan amount")
## Warning: Removed 591 rows containing missing values (geom_point).

# Scatterplot of credit score to loan amount, colored by number of investors,
# and faceted by state
ggplot(
aes(x = CreditScoreRangeLower, y = LoanOriginalAmount),
data = loans) +
geom_point(aes(color = Investors)) +
facet_wrap( ~ BorrowerState) +
labs(
title = "Credit score to loan amount",
x = "Credit score", y = "Loan amount")
## Warning: Removed 591 rows containing missing values (geom_point).

# Scatterplot of credit score to loan amount, colored by number of investors,
# and faceted by occupation
ggplot(
aes(x = CreditScoreRangeLower, y = LoanOriginalAmount),
data = loans) +
geom_point(aes(color = Investors)) +
facet_wrap( ~ Occupation) +
labs(
title = "Credit score to loan amount",
x = "Credit score", y = "Loan amount")
## Warning: Removed 589 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).

# Scatterplot of credit score to loan amount, colored by number of investors,
# and faceted by income range
ggplot(
aes(x = CreditScoreRangeLower, y = LoanOriginalAmount),
data = loans) +
geom_point(aes(color = Investors)) +
facet_wrap( ~ IncomeRange) +
labs(
title = "Credit score to loan amount",
x = "Credit score", y = "Loan amount")
## Warning: Removed 591 rows containing missing values (geom_point).

# Scatterplot of credit score to loan amount, colored by number of investors,
# and faceted by prosper risk score
ggplot(
aes(x = CreditScoreRangeLower, y = LoanOriginalAmount),
data = loans) +
geom_point(aes(color = Investors)) +
facet_wrap( ~ ProsperScore) +
labs(
title = "Credit score to loan amount",
x = "Credit score", y = "Loan amount")
## Warning: Removed 591 rows containing missing values (geom_point).

###
# Scatterplot of loan amounts per date, colored by number of investors,
ggplot(
aes(x = ListingCreationDate, y = LoanOriginalAmount),
data = loans) +
geom_point(aes(color = Investors)) +
labs(
title = "Loan amounts by date",
x = "Date", y = "Loan amount")

# Scatterplot of loan amounts per date, colored by number of investors,
# and faceted by prosper risk score
ggplot(
aes(x = ListingCreationDate, y = LoanOriginalAmount),
data = loans) +
geom_point(aes(color = Investors)) +
facet_wrap( ~ ProsperScore) +
labs(
title = "Loan amounts by date",
x = "Date", y = "Loan amount")

# Scatterplot of loan amounts per date, colored by number of investors,
# and faceted by income range
ggplot(
aes(x = ListingCreationDate, y = LoanOriginalAmount),
data = loans) +
geom_point(aes(color = Investors)) +
facet_wrap( ~ IncomeRange) +
labs(
title = "Loan amounts by date",
x = "Date", y = "Loan amount")

# Scatterplot of loan amounts per date, colored by number of investors,
# and faceted by occupation
ggplot(
aes(x = ListingCreationDate, y = LoanOriginalAmount),
data = loans) +
geom_point(aes(color = Investors)) +
facet_wrap( ~ Occupation) +
labs(
title = "Loan amounts by date",
x = "Date", y = "Loan amount")

# Scatterplot of loan amounts per date, colored by number of investors,
# and faceted by state
ggplot(
aes(x = ListingCreationDate, y = LoanOriginalAmount),
data = loans) +
geom_point(aes(color = Investors)) +
facet_wrap( ~ BorrowerState) +
labs(
title = "Loan amounts by date",
x = "Date", y = "Loan amount")

###
# Scatterplot of loan amounts to delinquent amounts, colored by estimated return,
ggplot(
aes(x = LoanOriginalAmount, y = AmountDelinquent),
data = loans) +
geom_point(aes(color = EstimatedReturn)) +
labs(
title = "Loan amounts compared to delinquent amounts",
x = "Loan amount", y = "Delinquent amount")
## Warning: Removed 7622 rows containing missing values (geom_point).

# Scatterplot of loan amounts to delinquent amounts, colored by estimated return,
# and faceted by state
ggplot(
aes(x = LoanOriginalAmount, y = AmountDelinquent),
data = loans) +
geom_point(aes(color = EstimatedReturn)) +
facet_wrap( ~ BorrowerState) +
labs(
title = "Loan amounts compared to delinquent amounts",
x = "Loan amount", y = "Delinquent amount")
## Warning: Removed 3778 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 96 rows containing missing values (geom_point).
## Warning: Removed 6 rows containing missing values (geom_point).
## Warning: Removed 95 rows containing missing values (geom_point).
## Warning: Removed 708 rows containing missing values (geom_point).
## Warning: Removed 65 rows containing missing values (geom_point).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 172 rows containing missing values (geom_point).
## Warning: Removed 318 rows containing missing values (geom_point).
## Warning: Removed 6 rows containing missing values (geom_point).
## Warning: Removed 17 rows containing missing values (geom_point).
## Warning: Removed 28 rows containing missing values (geom_point).
## Warning: Removed 221 rows containing missing values (geom_point).
## Warning: Removed 68 rows containing missing values (geom_point).
## Warning: Removed 40 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 29 rows containing missing values (geom_point).
## Warning: Removed 62 rows containing missing values (geom_point).
## Warning: Removed 50 rows containing missing values (geom_point).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 178 rows containing missing values (geom_point).
## Warning: Removed 9 rows containing missing values (geom_point).
## Warning: Removed 149 rows containing missing values (geom_point).
## Warning: Removed 8 rows containing missing values (geom_point).
## Warning: Removed 18 rows containing missing values (geom_point).
## Warning: Removed 91 rows containing missing values (geom_point).
## Warning: Removed 9 rows containing missing values (geom_point).
## Warning: Removed 13 rows containing missing values (geom_point).
## Warning: Removed 17 rows containing missing values (geom_point).
## Warning: Removed 45 rows containing missing values (geom_point).
## Warning: Removed 25 rows containing missing values (geom_point).
## Warning: Removed 137 rows containing missing values (geom_point).
## Warning: Removed 6 rows containing missing values (geom_point).
## Warning: Removed 34 rows containing missing values (geom_point).
## Warning: Removed 106 rows containing missing values (geom_point).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 9 rows containing missing values (geom_point).
## Warning: Removed 15 rows containing missing values (geom_point).
## Warning: Removed 636 rows containing missing values (geom_point).
## Warning: Removed 53 rows containing missing values (geom_point).
## Warning: Removed 60 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 138 rows containing missing values (geom_point).
## Warning: Removed 47 rows containing missing values (geom_point).
## Warning: Removed 16 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).

# Scatterplot of loan amounts to delinquent amounts, colored by estimated return,
# and faceted by occupation
ggplot(
aes(x = LoanOriginalAmount, y = AmountDelinquent),
data = loans) +
geom_point(aes(color = EstimatedReturn)) +
facet_wrap( ~ Occupation) +
labs(
title = "Loan amounts compared to delinquent amounts",
x = "Loan amount", y = "Delinquent amount")
## Warning: Removed 2255 rows containing missing values (geom_point).
## Warning: Removed 114 rows containing missing values (geom_point).
## Warning: Removed 181 rows containing missing values (geom_point).
## Warning: Removed 155 rows containing missing values (geom_point).
## Warning: Removed 14 rows containing missing values (geom_point).
## Warning: Removed 31 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 9 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing missing values (geom_point).
## Warning: Removed 46 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 301 rows containing missing values (geom_point).
## Warning: Removed 262 rows containing missing values (geom_point).
## Warning: Removed 91 rows containing missing values (geom_point).
## Warning: Removed 16 rows containing missing values (geom_point).
## Warning: Removed 7 rows containing missing values (geom_point).
## Warning: Removed 38 rows containing missing values (geom_point).
## Warning: Removed 50 rows containing missing values (geom_point).
## Warning: Removed 160 rows containing missing values (geom_point).
## Warning: Removed 20 rows containing missing values (geom_point).
## Warning: Removed 6 rows containing missing values (geom_point).
## Warning: Removed 51 rows containing missing values (geom_point).
## Warning: Removed 31 rows containing missing values (geom_point).
## Warning: Removed 32 rows containing missing values (geom_point).
## Warning: Removed 85 rows containing missing values (geom_point).
## Warning: Removed 9 rows containing missing values (geom_point).
## Warning: Removed 41 rows containing missing values (geom_point).
## Warning: Removed 88 rows containing missing values (geom_point).
## Warning: Removed 14 rows containing missing values (geom_point).
## Warning: Removed 25 rows containing missing values (geom_point).
## Warning: Removed 15 rows containing missing values (geom_point).
## Warning: Removed 48 rows containing missing values (geom_point).
## Warning: Removed 1537 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 9 rows containing missing values (geom_point).
## Warning: Removed 49 rows containing missing values (geom_point).
## Warning: Removed 24 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 591 rows containing missing values (geom_point).
## Warning: Removed 22 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 62 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing missing values (geom_point).
## Warning: Removed 128 rows containing missing values (geom_point).
## Warning: Removed 258 rows containing missing values (geom_point).
## Warning: Removed 156 rows containing missing values (geom_point).
## Warning: Removed 19 rows containing missing values (geom_point).
## Warning: Removed 102 rows containing missing values (geom_point).
## Warning: Removed 20 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 28 rows containing missing values (geom_point).
## Warning: Removed 30 rows containing missing values (geom_point).
## Warning: Removed 32 rows containing missing values (geom_point).
## Warning: Removed 14 rows containing missing values (geom_point).
## Warning: Removed 8 rows containing missing values (geom_point).
## Warning: Removed 6 rows containing missing values (geom_point).
## Warning: Removed 146 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 16 rows containing missing values (geom_point).
## Warning: Removed 23 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 56 rows containing missing values (geom_point).
## Warning: Removed 22 rows containing missing values (geom_point).

# Scatterplot of loan amounts to delinquent amounts, colored by estimated return,
# and faceted by prosper risk score
ggplot(
aes(x = LoanOriginalAmount, y = AmountDelinquent),
data = loans) +
geom_point(aes(color = EstimatedReturn)) +
facet_wrap( ~ ProsperScore) +
labs(
title = "Loan amounts compared to delinquent amounts",
x = "Loan amount", y = "Delinquent amount")
## Warning: Removed 7622 rows containing missing values (geom_point).

# Scatterplot of loan amounts to delinquent amounts, colored by estimated return,
# and faceted by income range.
ggplot(
aes(x = LoanOriginalAmount, y = AmountDelinquent),
data = loans) +
geom_point(aes(color = EstimatedReturn)) +
facet_wrap( ~ IncomeRange) +
labs(
title = "Loan amounts compared to delinquent amounts",
x = "Loan amount", y = "Delinquent amount")
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 7 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 6 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 7602 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_point).

###
# Line graph of average loan amounts by year
ggplot(
aes(x = ListingCreationYear, y = mean_loanOriginalAmount),
data = loans.summary_by_year) +
geom_line() +
geom_point(aes(size = count)) +
labs(
title = "Average loan amounts by year",
x = "Year", y = "Average loan amount") +
scale_size(name = "Number of loans")

# Line graph of average amount delinquent by year
ggplot(
aes(x = ListingCreationYear, y = mean_amountDelinquent),
data = loans.summary_by_year) +
geom_line() +
geom_point(aes(size = count)) +
labs(
title = "Average amount delinquent by year",
x = "Year", y = "Average delinquent amount") +
scale_size(name = "Number of loans")
## Warning: Removed 4 rows containing missing values (geom_path).
## Warning: Removed 4 rows containing missing values (geom_point).

# Line graph of average number of investors by year
ggplot(
aes(x = ListingCreationYear, y = mean_investors),
data = loans.summary_by_year) +
geom_line() +
geom_point(aes(size = count)) +
labs(
title = "Average number of investors by year",
x = "Year", y = "Average number of investors") +
scale_size(name = "Number of loans")

# Line graph of average prosper score by year
ggplot(
aes(x = ListingCreationYear, y = mean_prosperScore),
data = loans.summary_by_year) +
geom_line() +
geom_point(aes(size = count)) +
labs(
title = "Average Prosper score by year",
x = "Year", y = "Average prosper score") +
scale_size(name = "Number of loans")
## Warning: Removed 5 rows containing missing values (geom_path).
## Warning: Removed 5 rows containing missing values (geom_point).

# Line graph of average estimated lost by year
ggplot(
aes(x = ListingCreationYear, y = mean_estimatedLoss),
data = loans.summary_by_year) +
geom_line() +
geom_point(aes(size = count)) +
labs(
title = "Average estimated loss by year",
x = "Year", y = "Average estimated loss") +
scale_size(name = "Number of loans")
## Warning: Removed 5 rows containing missing values (geom_path).
## Warning: Removed 5 rows containing missing values (geom_point).

# Line graph of average estimated return by year
ggplot(
aes(x = ListingCreationYear, y = mean_estimatedReturn),
data = loans.summary_by_year) +
geom_line() +
geom_point(aes(size = count)) +
labs(
title = "Average estimated loss by year",
x = "Year", y = "Average estimated return") +
scale_size(name = "Number of loans")
## Warning: Removed 5 rows containing missing values (geom_path).
## Warning: Removed 5 rows containing missing values (geom_point).
